#! /bin/bash
date >> $LOGFILE
echo "my prolog=$0" >> $LOGFILE
echo "my workdir=`pwd`" >> $LOGFILE

SLURM_INSTALL_PATH=/opt/gridview/slurm
# ========= job params from jobdetail==========
#job resource from job detail
# head node CPUIDS
DOCKER_CPUIDS=$(getFinalNodeCpus $JOBDETAIL)
# head node GPUIDS
DOCKER_GPUIDS=$(getFinalNodeGpus $JOBDETAIL)
# head node MEM
DOCKER_CTRMEM=$(getFinalNodeMem $JOBDETAIL)
echo "DOCKER_CPUIDS=$DOCKER_CPUIDS" >> $LOGFILE
echo "DOCKER_GPUIDS=$DOCKER_GPUIDS" >> $LOGFILE
echo "DOCKER_CTRMEM=$DOCKER_CTRMEM" >> $LOGFILE
# ===================

#single docker container name
DOCKER_NAME=$(getDockerName)
echo "DOCKER_NAME=$DOCKER_NAME" >> $LOGFILE
# ===================
initRoleContainerParams(){
    index=$1
    #if only cpu,maybe no necessary
    docker_shmsize="$DOCKER_CTRMEM"
    echo "docker_shmsize=$docker_shmsize" >> $LOGFILE
    #
    docker_framework_ports_map=""
    #
    docker_framework_env=""
    #
    ib_driver_mount="-v ${sharing_path}/ib_driver/:/opt/ib_driver/:ro "
    #
    motd_mount="-v ${TASK_PATH}/motd:/etc/motd"
	  #
    docker_framework_mounts="-v ${SOTHISAI_HOME}/scripts/scheduler/slurm/instance/:/opt/SothisAI/ ${extra_mount_paths} ${motd_mount} ${ib_driver_mount}"
    # final: sh -c "cat /proc/1/environ| tr '\0' '\n' | grep -v HOME= | awk -v ex='export ' '{printf ex;print $1}'> /etc/profile.d/sothisai.sh;/usr/sbin/sshd -D"
    docker_cmd_arg="cat /proc/1/environ| tr '\0' '\n' | grep -v HOME= | awk -v ex='export ' '{printf ex;print \\\$1}'> /etc/profile.d/sothisai.sh;echo \\\\\\\"export HOME=$USERHOME\\\\\\\" >> /etc/profile.d/sothisai.sh;sed -i -e 's/=/=\\\\\\\"/' -e 's/$/\\\\\\\"/' /etc/profile.d/sothisai.sh; sed -i 's/.*UseDNS.*/UseDNS no/' /etc/ssh/sshd_config ; sed -i '/ai_proxy/d' /etc/bashrc /etc/bash.bashrc >/dev/null 2>&1 ; if [ -e $USERHOME/.ai_user_info/ai_proxy ];then echo 'source $USERHOME/.ai_user_info/ai_proxy' | tee -a /etc/bashrc /etc/bash.bashrc ; fi ;/usr/sbin/sshd -D"
    #
}

# ***************************

# Usage: ${taskUser} ${taskImage} $dcName $dcCpuIDs ${taskMem} $dcGpuIDs $taskRole $taskx $taskNode
function startRoleContainer(){
	echo "Arguments of startRoleContainer: $*" >> $LOGFILE
	echo "TASK_PATH:$TASK_PATH" >>$LOGFILE
	echo "SLURM_JOB_ID:$SLURM_JOB_ID" >>$LOGFILE

	if [ $# -ne 12 ];then
		echo "Arguments of startRoleContainer not valid " >> $LOGFILE
		return 1
	fi
	initRoleContainerParams $8
	DC_NAME=$3
	createRoleContainerParamsFile $*

	echo "sh $SLURM_INSTALL_PATH/etc/sothisai/startRoleContainer.sh $TASK_PATH $DC_NAME $SLURM_JOB_ID" >> $LOGFILE
	sh $SLURM_INSTALL_PATH/etc/sothisai/startRoleContainer.sh $TASK_PATH $DC_NAME $SLURM_JOB_ID 2
	install_path=`echo ${SLURM_INSTALL_PATH%/*}`
	squeue >> $LOGFILE
}
# =========================================================

#
# get task cpu IDs of specified task from record file
# Usage: $0 <task id> <task record file>
#
# Record format: INDEX=1,CPUIDS=2:3,GPUIDS=1
#
function get_task_cpu_ids()
{

	if [ $# -ne 2 ]; then
		echo "Usage: $0 <task id> <task record file>" >&2
		return 1
	fi

	local MATCHSTR="INDEX=${1}"
	cat ${2} | awk -F, -v MATCH=${MATCHSTR} '{if ($1 == MATCH) { split($2,arr,"="); printf("%s",arr[2]) } }'
	return 0
}

#
# get task GPU IDs of specified task from record file
# Usage: $0 <task id> <task record file>
#
# Record format: INDEX=1,CPUIDS=2:3,GPUIDS=1
#
function get_task_gpu_ids()
{
	if [ $# -ne 2 ]; then
		echo "Usage: $0 <task id> <task record file>" >&2
		return 1
	fi

	local MATCHSTR="INDEX=${1}"
	cat ${2} | awk -F, -v MATCH=${MATCHSTR} '{if ($1 == MATCH) { split($3,arr,"="); printf("%s",arr[2]) } }'
	return 0
}

#
# schedule resources (cpu/gpu/mem) to each tasks individual
# Usage: $0 <comma-separated tasks> <cpu per task> <gpu per task> <file to save result>
#
function schedTaskResources()
{
	if [ $# -ne 4 ]; then
		echo "Usage: $0 <comma-separated tasks> <cpu per task> <gpu per task> <file to save result>" >&2
		return 1
	fi

	local TASK_INDEXS="$(echo ${1} | sed 's/,/ /g')"
	local TASK_CPU_NUM="${2}"
	local TASK_GPU_NUM="${3}"
	local save_file="${4}"

	echo "ArgV[0] TASK_INDEXS:${TASK_INDEXS}" >> $LOGFILE
	echo "ArgV[1] TASK_CPU_NUM:${TASK_CPU_NUM}" >> $LOGFILE
	echo "ArgV[2] TASK_GPU_NUM:${TASK_GPU_NUM}" >> $LOGFILE
	echo "ArgV[3] save_file:${save_file}" >> $LOGFILE

	test -f ${save_file} && rm -f ${save_file}

	local dcCpuIDs=""
	local dcGpuIDs=""

	local leftCpuIDs=""
	local leftGpuIDs=""

	local cpuneed=""
	local gpuneed=""

	for taskx in $TASK_INDEXS
	do

		cpuneed=${TASK_CPU_NUM}
		gpuneed=${TASK_GPU_NUM}

		echo "before sched cpu, allCpuIDs:${allCpuIDs}" >> $LOGFILE
		echo "before sched cpu, leftCpuIDs:${leftCpuIDs}" >> $LOGFILE

		dcCpuIDs=""
		for cpux in $allCpuIDs
		do
			echo "      cpuneed:${cpuneed}" >> $LOGFILE
			if [ $cpuneed -gt 0 ]; then
				if [ -z "$dcCpuIDs" ]; then
					dcCpuIDs="$cpux"
				else
					dcCpuIDs="$dcCpuIDs,$cpux"
				fi
				let "cpuneed-=1"
			else
				if [ -z "$leftCpuIDs" ]; then
					leftCpuIDs="$cpux"
				else
					leftCpuIDs="$leftCpuIDs $cpux"
				fi
			fi
		done

		allCpuIDs="$leftCpuIDs"
		leftCpuIDs=""
		echo "allCpuIDs changed to: ${allCpuIDs}" >> $LOGFILE

		echo "before sched GPU, allGpuIDs:${allGpuIDs}" >> $LOGFILE
		echo "before sched GPU, leftGpuIDs:${leftGpuIDs}" >> $LOGFILE

		dcGpuIDs=""
		for gpux in $allGpuIDs
		do
			echo "      gpuneed:${gpuneed}" >> $LOGFILE
			if [ $gpuneed -gt 0 ]; then
				if [ -z "$dcGpuIDs" ]; then
					dcGpuIDs="$gpux"
				else
					dcGpuIDs="$dcGpuIDs,$gpux"
				fi
				let "gpuneed-=1"
			else
				if [ -z "$leftGpuIDs" ]; then
					leftGpuIDs="$gpux"
				else
					leftGpuIDs="$leftGpuIDs $gpux"
				fi
			fi
		done

		allGpuIDs="$leftGpuIDs"
		leftGpuIDs=""
		echo "allGpuIDs changed to: ${allGpuIDs}" >> $LOGFILE

		echo " ->-> dcCpuIDs=$dcCpuIDs" >> $LOGFILE
		echo " ->-> dcGpuIDs=$dcGpuIDs" >> $LOGFILE

		if [ -z "$dcCpuIDs" ]; then
			dcCpuIDs="-"
		fi

		if [ -z "$dcGpuIDs" ]; then
			dcGpuIDs="-"
		fi

		local rcCpuIDs=$(echo ${dcCpuIDs} | sed 's/,/:/g')
		echo " ->-> rcCpuIDs=${rcCpuIDs}" >> $LOGFILE

		local rcGpuIDs=$(echo ${dcGpuIDs} | sed 's/,/:/g')
		echo " ->-> rcGpuIDs=${rcGpuIDs}" >> $LOGFILE

		echo "INDEX=${taskx},CPUIDS=${rcCpuIDs},GPUIDS=${rcGpuIDs}" >> $save_file

	done
}

#
# start all tasks with specified role
# Usage: $0 <SLURM_JOB_USER> <SLURM_JOB_ID> <taskNode> <IMAGENAME> <ROLE_NAME> <TASK_INDEXS> <TASK CPU NUM> <TASK MEM> <TASK GPU NUM>
#
function StartRoledTasks()
{

	if [ $# -ne 8 ]; then
		echo "Usage: $0 <SLURM_JOB_USER> <SLURM_JOB_ID> <TASK_NODE> <IMAGENAME> <TASK_INDEXS> <TASK CPU NUM> <TASK MEM> <TASK GPU NUM>" >&2
		return 1
	fi

	local taskUser=$1
	local taskJobID=$2
	local taskNode=$3
	local taskImage=$4
	local taskIndexList=$5
	local taskCpuNum=$6
	local taskMem="${7}M"
	local taskGpuNum=$8

	local dcCpuIDs=""
	local dcGpuIDs=""
	local dockerInfo=""
 	schedTaskResources ${taskIndexList} ${taskCpuNum} ${taskGpuNum} ${temp_file}

        TASK_INDEXS="$(echo ${taskIndexList} | sed 's/,/ /g')"
      	TASK_INDEXS_arr=($TASK_INDEXS)
        TASK_INDEXS_length=${#TASK_INDEXS_arr[@]}
	#预先准备配置信息
	for taskx in $TASK_INDEXS
        do
		dcName="${taskJobID}_${taskNode}_worker_$taskx"
		mkdir ${TASK_PATH}/container_names/
		echo "ALIAS=worker-${taskx},CPU_USAGE=0,GPU_USAGE=0,RAM_USAGE=0,GPU_MEM_USAGE=0,TYPE=worker,INDEX=${taskx},NODE=${taskNode},NAME=${dcName},STATUS=waiting,IPADDR=-" > $TASK_PATH/container_names/$dcName
	done
	chown -R $taskUser $TASK_PATH/container_names/
	
        for taskx in $TASK_INDEXS
        do
		dcName="${taskJobID}_${taskNode}_worker_$taskx"
		dcCpuIDs=$(get_task_cpu_ids ${taskx} ${temp_file})
		dcCpuIDs=$(echo ${dcCpuIDs} | sed 's/:/,/g')
		dcGpuIDs=$(get_task_gpu_ids ${taskx} ${temp_file})
		dcGpuIDs=$(echo ${dcGpuIDs} | sed 's/:/,/g')
            	startRoleContainer ${taskUser} ${taskImage} $dcName $dcCpuIDs ${taskMem} "$dcGpuIDs" worker $taskx $taskNode ${accelerator_type} worker-${taskx} ${TASK_PATH}/shared_hosts
	done
	test -f ${temp_file} && rm -f ${temp_file}
}

function DistributeStart()
{

	#获取该任务关联的节点
	source $TASK_PATH/containers_resource_info
	SLURM_NODES=`$SCONTROL show hostname $SLURM_NODELIST`
	local subfunc="$FRAMEWORK_HOME/subfunction.sh"
	test -f $subfunc && source $subfunc

	#获取该节点的下标
	CUR_NODE_INDEX=`getNodeIndexOfList $SLURM_JOB_ID $SLURMD_NODENAME`

	#获取该节点关联的容器下标
	local WK_TASK_INDEXS=""
	WK_TASK_INDEXS=$(get_task_indexs_of_nodex ${CUR_NODE_INDEX} ${APPLY_NODE_NUMBER} ${WK_TOTAL_NUMBER})
	local allCpuIDs=`getSpecifiedJobNodeCpuIDs $JOBDETAIL $SLURMD_NODENAME| sed 's/,/ /g'`
	local allGpuIDs=`getSpecifiedJobNodeGpuIDs $JOBDETAIL $SLURMD_NODENAME | sed 's/,/ /g'`
	#echo "$allGpuIDs" >> /tmp/s
	#echo "$allCpuIDs" >> /tmp/s
	# NOTE: should be located $SPOOL_DIR
	local temp_file="/tmp/tasks.${SLURM_JOB_ID}.$$"
	StartRoledTasks ${SLURM_JOB_USER} ${SLURM_JOB_ID} ${SLURMD_NODENAME} ${IMAGENAME} ${WK_TASK_INDEXS} ${CPU_NUMBER} ${MEM_SIZE} ${GPU_NUMBER}
}
echo "distributed: $distributed" >> $LOGFILE
log_prolog_message "==Deploy container environment=="

DistributeStart
HEADER_NODE=`getHeaderNode`
if [ $SLURMD_NODENAME = $HEADER_NODE ];then
	while true
	do
		ready_containers_number=`ls $TASK_PATH/container_names | wc -l`
		if [ $ready_containers_number -eq $WK_TOTAL_NUMBER ];then
			dockerlistfile=`getDockerInstanceFilePath $TASK_PATH $SLURM_JOB_ID`
			cat $TASK_PATH/container_names/* > $dockerlistfile
			cat $dockerlistfile | awk -F, '{print $13"="$1}' | awk -F= '{print $2" "$4}' >> $TASK_PATH/shared_hosts
			break
		fi
	done
	
fi
echo "over" >> $LOGFILE
log_prolog_message "==Deploy environment completed=="
date >> $LOGFILE
#
# No other SLURM jobs, purge all remaining processes of this user
#
